{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lottery prediction with LSTM model\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "made by J-D Park, Yonsei CSE Ph.D Student"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load dependacies\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn import preprocessing\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from matplotlib import pyplot\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense\n",
    "from tensorflow.keras.layers import LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load raw data\n",
    "# os.getcwd()\n",
    "rawdata = pd.read_csv('dataset/lottery_history.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>bonus</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>959</td>\n",
       "      <td>1</td>\n",
       "      <td>14</td>\n",
       "      <td>15</td>\n",
       "      <td>24</td>\n",
       "      <td>40</td>\n",
       "      <td>41</td>\n",
       "      <td>35</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>958</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>10</td>\n",
       "      <td>16</td>\n",
       "      <td>35</td>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>957</td>\n",
       "      <td>4</td>\n",
       "      <td>15</td>\n",
       "      <td>24</td>\n",
       "      <td>35</td>\n",
       "      <td>36</td>\n",
       "      <td>40</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>956</td>\n",
       "      <td>10</td>\n",
       "      <td>11</td>\n",
       "      <td>20</td>\n",
       "      <td>21</td>\n",
       "      <td>25</td>\n",
       "      <td>41</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>955</td>\n",
       "      <td>4</td>\n",
       "      <td>9</td>\n",
       "      <td>23</td>\n",
       "      <td>26</td>\n",
       "      <td>29</td>\n",
       "      <td>33</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>954</th>\n",
       "      <td>5</td>\n",
       "      <td>16</td>\n",
       "      <td>24</td>\n",
       "      <td>29</td>\n",
       "      <td>40</td>\n",
       "      <td>41</td>\n",
       "      <td>42</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>955</th>\n",
       "      <td>4</td>\n",
       "      <td>14</td>\n",
       "      <td>27</td>\n",
       "      <td>30</td>\n",
       "      <td>31</td>\n",
       "      <td>40</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>956</th>\n",
       "      <td>3</td>\n",
       "      <td>11</td>\n",
       "      <td>16</td>\n",
       "      <td>19</td>\n",
       "      <td>21</td>\n",
       "      <td>27</td>\n",
       "      <td>31</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>957</th>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>21</td>\n",
       "      <td>25</td>\n",
       "      <td>32</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>958</th>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>23</td>\n",
       "      <td>29</td>\n",
       "      <td>33</td>\n",
       "      <td>37</td>\n",
       "      <td>40</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>959 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Unnamed: 0   1   2   3   4   5   6  bonus\n",
       "0           959   1  14  15  24  40  41     35\n",
       "1           958   2   9  10  16  35  37      1\n",
       "2           957   4  15  24  35  36  40      1\n",
       "3           956  10  11  20  21  25  41     40\n",
       "4           955   4   9  23  26  29  33      8\n",
       "..          ...  ..  ..  ..  ..  ..  ..    ...\n",
       "954           5  16  24  29  40  41  42      3\n",
       "955           4  14  27  30  31  40  42      2\n",
       "956           3  11  16  19  21  27  31     30\n",
       "957           2   9  13  21  25  32  42      2\n",
       "958           1  10  23  29  33  37  40     16\n",
       "\n",
       "[959 rows x 8 columns]"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rawdata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sort with ascending order\n",
    "rawdata = rawdata.sort_values(by=['Unnamed: 0'], axis=0)\n",
    "rawnp = rawdata.to_numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[10 23 29 ... 37 40 16]\n",
      " [ 9 13 21 ... 32 42  2]\n",
      " [11 16 19 ... 27 31 30]\n",
      " ...\n",
      " [ 4 15 24 ... 36 40  1]\n",
      " [ 2  9 10 ... 35 37  1]\n",
      " [ 1 14 15 ... 40 41 35]]\n"
     ]
    }
   ],
   "source": [
    "rawnp_proc = rawnp[:,1:]\n",
    "print(rawnp_proc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "# to construct one-hot encoded input dataset\n",
    "inputnp = np.zeros((len(rawnp),45))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert to one-hot features (multi-variate time-series)\n",
    "i = 0\n",
    "for row in rawnp_proc:\n",
    "    for elem in row:\n",
    "        #assign one-hot\n",
    "        inputnp[i, elem-1] = 1\n",
    "    i += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., ..., 0., 0., 0.],\n",
       "       [0., 1., 0., ..., 0., 0., 0.],\n",
       "       [0., 0., 0., ..., 0., 0., 0.],\n",
       "       ...,\n",
       "       [1., 0., 0., ..., 0., 0., 0.],\n",
       "       [1., 1., 0., ..., 0., 0., 0.],\n",
       "       [1., 0., 0., ..., 0., 0., 0.]])"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#final output\n",
    "inputnp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "# save to the local\n",
    "np.save('data.npy', inputnp)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "About model from now on -> future migration of codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 290,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = np.load('data.npy')\n",
    "dataset_pd = pd.DataFrame(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 291,
   "metadata": {},
   "outputs": [],
   "source": [
    "def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):\n",
    "  n_vars = 1 if type(data) is list else data.shape[1]\n",
    "  df = pd.DataFrame(data)\n",
    "  cols, names = list(), list()\n",
    "  # input sequence (t-n, ... t-1)\n",
    "  for i in range(n_in, 0, -1):\n",
    "      cols.append(df.shift(i))\n",
    "      names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]\n",
    "  # forecast sequence (t, t+1, ... t+n)\n",
    "  for i in range(0, n_out):\n",
    "      cols.append(df.shift(-i))\n",
    "      if i == 0:\n",
    "          names += [('var%d(t)' % (j+1)) for j in range(n_vars)]\n",
    "      else:\n",
    "          names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]\n",
    "  # put it all together\n",
    "  agg = pd.concat(cols, axis=1)\n",
    "  agg.columns = names\n",
    "  # drop rows with NaN values\n",
    "  if dropnan:\n",
    "      agg.dropna(inplace=True)\n",
    "  return agg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 292,
   "metadata": {},
   "outputs": [],
   "source": [
    "reframed = series_to_supervised(dataset, 1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 293,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>var1(t-1)</th>\n",
       "      <th>var2(t-1)</th>\n",
       "      <th>var3(t-1)</th>\n",
       "      <th>var4(t-1)</th>\n",
       "      <th>var5(t-1)</th>\n",
       "      <th>var6(t-1)</th>\n",
       "      <th>var7(t-1)</th>\n",
       "      <th>var8(t-1)</th>\n",
       "      <th>var9(t-1)</th>\n",
       "      <th>var10(t-1)</th>\n",
       "      <th>...</th>\n",
       "      <th>var36(t)</th>\n",
       "      <th>var37(t)</th>\n",
       "      <th>var38(t)</th>\n",
       "      <th>var39(t)</th>\n",
       "      <th>var40(t)</th>\n",
       "      <th>var41(t)</th>\n",
       "      <th>var42(t)</th>\n",
       "      <th>var43(t)</th>\n",
       "      <th>var44(t)</th>\n",
       "      <th>var45(t)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>954</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>955</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>956</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>957</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>958</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>958 rows × 90 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     var1(t-1)  var2(t-1)  var3(t-1)  var4(t-1)  var5(t-1)  var6(t-1)  \\\n",
       "1          0.0        0.0        0.0        0.0        0.0        0.0   \n",
       "2          0.0        1.0        0.0        0.0        0.0        0.0   \n",
       "3          0.0        0.0        0.0        0.0        0.0        0.0   \n",
       "4          0.0        1.0        0.0        0.0        0.0        0.0   \n",
       "5          0.0        0.0        1.0        0.0        0.0        0.0   \n",
       "..         ...        ...        ...        ...        ...        ...   \n",
       "954        1.0        0.0        0.0        0.0        0.0        0.0   \n",
       "955        0.0        0.0        0.0        1.0        0.0        0.0   \n",
       "956        0.0        0.0        0.0        0.0        0.0        0.0   \n",
       "957        1.0        0.0        0.0        1.0        0.0        0.0   \n",
       "958        1.0        1.0        0.0        0.0        0.0        0.0   \n",
       "\n",
       "     var7(t-1)  var8(t-1)  var9(t-1)  var10(t-1)  ...  var36(t)  var37(t)  \\\n",
       "1          0.0        0.0        0.0         1.0  ...       0.0       0.0   \n",
       "2          0.0        0.0        1.0         0.0  ...       0.0       0.0   \n",
       "3          0.0        0.0        0.0         0.0  ...       0.0       0.0   \n",
       "4          0.0        0.0        0.0         0.0  ...       0.0       0.0   \n",
       "5          0.0        0.0        0.0         0.0  ...       0.0       0.0   \n",
       "..         ...        ...        ...         ...  ...       ...       ...   \n",
       "954        0.0        0.0        1.0         0.0  ...       0.0       0.0   \n",
       "955        0.0        1.0        1.0         0.0  ...       0.0       0.0   \n",
       "956        0.0        0.0        0.0         1.0  ...       1.0       0.0   \n",
       "957        0.0        0.0        0.0         0.0  ...       0.0       1.0   \n",
       "958        0.0        0.0        1.0         1.0  ...       0.0       0.0   \n",
       "\n",
       "     var38(t)  var39(t)  var40(t)  var41(t)  var42(t)  var43(t)  var44(t)  \\\n",
       "1         0.0       0.0       0.0       0.0       1.0       0.0       0.0   \n",
       "2         0.0       0.0       0.0       0.0       0.0       0.0       0.0   \n",
       "3         0.0       0.0       1.0       0.0       1.0       0.0       0.0   \n",
       "4         0.0       0.0       1.0       1.0       1.0       0.0       0.0   \n",
       "5         0.0       0.0       1.0       0.0       1.0       0.0       0.0   \n",
       "..        ...       ...       ...       ...       ...       ...       ...   \n",
       "954       0.0       0.0       0.0       0.0       0.0       0.0       0.0   \n",
       "955       0.0       0.0       1.0       1.0       0.0       0.0       0.0   \n",
       "956       0.0       0.0       1.0       0.0       0.0       0.0       0.0   \n",
       "957       0.0       0.0       0.0       0.0       0.0       0.0       0.0   \n",
       "958       0.0       0.0       1.0       1.0       0.0       0.0       0.0   \n",
       "\n",
       "     var45(t)  \n",
       "1         0.0  \n",
       "2         0.0  \n",
       "3         0.0  \n",
       "4         0.0  \n",
       "5         0.0  \n",
       "..        ...  \n",
       "954       0.0  \n",
       "955       0.0  \n",
       "956       0.0  \n",
       "957       0.0  \n",
       "958       0.0  \n",
       "\n",
       "[958 rows x 90 columns]"
      ]
     },
     "execution_count": 293,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reframed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 294,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ENTIRE_NUMBER = 45\n",
    "\n",
    "# values = reframed.values\n",
    "# n_train_hours = int(0.8 * len(dataset))\n",
    "# train = values[:n_train_hours, :]\n",
    "# test = values[n_train_hours:, :]\n",
    "# # split into input and outputs\n",
    "# train_X, train_y = train[:, :-1], train[:, -1]\n",
    "# test_X, test_y = test[:, :-1], test[:, -1]\n",
    "# # reshape input to be 3D [samples, timesteps, features]\n",
    "# train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))\n",
    "# test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 295,
   "metadata": {},
   "outputs": [],
   "source": [
    "ENTIRE_NUMBER = 45\n",
    "\n",
    "values = reframed.values\n",
    "n_train_hours = int(0.8 * len(dataset))\n",
    "train = values[:n_train_hours, :]\n",
    "test = values[n_train_hours:n_train_hours+1, :]\n",
    "# split into input and outputs\n",
    "train_X, train_y = train[:, :ENTIRE_NUMBER], train[:, ENTIRE_NUMBER:]\n",
    "test_X, test_y = test[:, :ENTIRE_NUMBER], test[:, ENTIRE_NUMBER:]\n",
    "# reshape input to be 3D [samples, timesteps, features]\n",
    "train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))\n",
    "test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "45"
      ]
     },
     "execution_count": 296,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_X.shape[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 297,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = Sequential()\n",
    "model.add(LSTM(128, input_shape=(train_X.shape[1], train_X.shape[2])))\n",
    "model.add(Dense(ENTIRE_NUMBER))\n",
    "model.compile(loss='mse', optimizer='adam')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 298,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "96/96 - 0s - loss: 0.1386\n",
      "Epoch 2/10\n",
      "96/96 - 0s - loss: 0.1308\n",
      "Epoch 3/10\n",
      "96/96 - 0s - loss: 0.1291\n",
      "Epoch 4/10\n",
      "96/96 - 0s - loss: 0.1279\n",
      "Epoch 5/10\n",
      "96/96 - 0s - loss: 0.1269\n",
      "Epoch 6/10\n",
      "96/96 - 0s - loss: 0.1261\n",
      "Epoch 7/10\n",
      "96/96 - 0s - loss: 0.1254\n",
      "Epoch 8/10\n",
      "96/96 - 0s - loss: 0.1248\n",
      "Epoch 9/10\n",
      "96/96 - 0s - loss: 0.1243\n",
      "Epoch 10/10\n",
      "96/96 - 0s - loss: 0.1237\n"
     ]
    }
   ],
   "source": [
    "history = model.fit(train_X, train_y, epochs=10, batch_size=8, \n",
    "         verbose=2, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 299,
   "metadata": {},
   "outputs": [],
   "source": [
    "yhat = model.predict(test_X)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 300,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.1883061 ,  0.05320678,  0.16360234,  0.14108801,  0.12809163,\n",
       "         0.35185164,  0.2269278 ,  0.13521384,  0.07440365,  0.19711213,\n",
       "         0.3528889 ,  0.06874692,  0.23552819,  0.29494694, -0.01834923,\n",
       "         0.16106597,  0.03067926,  0.10306194,  0.04760702,  0.19481601,\n",
       "         0.15245   , -0.07876786,  0.12156203,  0.25573358,  0.20910658,\n",
       "         0.26953048,  0.29428062,  0.01463741,  0.19032341,  0.13220221,\n",
       "         0.19059548,  0.16392511,  0.23256163,  0.03474504,  0.20236427,\n",
       "         0.07094978,  0.03037872,  0.18603517,  0.13383919,  0.23467389,\n",
       "         0.15983039,  0.09181807,  0.17738949,  0.2826555 ,  0.09778278]],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 300,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "yhat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {},
   "outputs": [],
   "source": [
    "yhat_assigned = np.argsort(-yhat[:])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 302,
   "metadata": {},
   "outputs": [],
   "source": [
    "ww = yhat_assigned[:,:6]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 303,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[10,  5, 13, 26, 43, 25]], dtype=int64)"
      ]
     },
     "execution_count": 303,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predicted"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 304,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[10,  5, 25, 13, 26,  9]], dtype=int64)"
      ]
     },
     "execution_count": 304,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gth"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "10\n",
      "aa\n",
      "5\n",
      "aa\n",
      "13\n",
      "aa\n",
      "26\n",
      "aa\n",
      "43\n",
      "bb\n",
      "25\n",
      "aa\n"
     ]
    }
   ],
   "source": [
    "for i in ww[0]:\n",
    "    print(i)\n",
    "    if i in gth:\n",
    "        print('aa')\n",
    "    else:\n",
    "        print('bb')\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 306,
   "metadata": {},
   "outputs": [],
   "source": [
    "gth = np.argsort(-test_y[:])\n",
    "gth = yhat_assigned[:,:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 307,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6"
      ]
     },
     "execution_count": 307,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(gth.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 308,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1.,\n",
       "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
       "        0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.]])"
      ]
     },
     "execution_count": 308,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 309,
   "metadata": {},
   "outputs": [],
   "source": [
    "# yhat = model.predict(test_X)\n",
    "# test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))\n",
    "# # invert scaling for forecast\n",
    "# inv_yhat = np.concatenate((yhat, test_X[:, 1:]), axis=1)\n",
    "# #inv_yhat = scaler.inverse_transform(inv_yhat)\n",
    "# inv_yhat = inv_yhat[:,0]\n",
    "# # invert scaling for actual\n",
    "# test_y = test_y.reshape((len(test_y), 1))\n",
    "# inv_y = np.concatenate((test_y, test_X[:, 1:]), axis=1)\n",
    "# #inv_y = scaler.inverse_transform(inv_y)\n",
    "# inv_y = inv_y[:,0]\n",
    "# # calculate RMSE\n",
    "# rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "89"
      ]
     },
     "execution_count": 240,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_X.shape[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
